library(astsa)
library(fGarch)
library(fpp2)
library(latex2exp)
library(tidyverse)
library(TSA)
library(tseries)
library(xts)
#Custom Function for Creating Training and Testing Data
ts.train.test <- function(data, freq, p = 0.75){
total.length = length(data)
#Splitting up the Data
test.length = round(total.length * (1 - p), 0)
train.length = total.length - test.length
data.test = data[train.length:total.length]
data.train = data[1:(train.length - 1)]
#Coercing the data into time series format
data.test = ts(data.test, start = time(data)[train.length], frequency = freq)
data.train = ts(data.train, start = time(data)[1], frequency = freq)
#Returning a list of the training and testing data
x = list(data.train, data.test)
names(x) <- c("train","test")
return(x)
}
#Custom Function for Calculating RMSE
tsRMSE <- function(data, freq, prob = 0.75, p = 0, d = 0, q = 0, P = 0, D = 0, Q = 0, S = 1){
#Creating the training and testing splits
train = ts.train.test(data, freq, p = prob)$train
test = ts.train.test(data, freq, p = prob)$test
#Forecast modeling
model = sarima.for(train, p, d, q, P, D, Q, S, no.constant = TRUE,
n.ahead = length(test), plot = FALSE)
#RMSE Calculations
RMSE = sqrt(mean((model$pred - as.numeric(test))^2))
RMSE1.5 = sqrt(mean((model$pred[1:5] - as.numeric(test[1:5]))^2))
#Returning a list of the RMSE values
x = list(RMSE, RMSE1.5)
names(x) <- c("RMSE", "RMSE Obs. 1:5")
return(x)
}
#Custom Function for Calculating RMSE for GARCH
garchRMSE <- function(data, freq, prob = 0.75, p = 0, q = 0, alpha = 0, beta = 0){
#Creating the training and testing splits
train = ts.train.test(data, freq, p = prob)$train
test = ts.train.test(data, freq, p = prob)$test
#Forecast modeling
model = garchFit(substitute(~arma(p,q) + garch(alpha, beta)), train)
pred = predict(model, n.ahead = length(test))
#RMSE Calculations
RMSE = sqrt(mean((as.numeric(pred$meanForecast) - as.numeric(test))^2))
RMSE1.5 = sqrt(mean((as.numeric(pred$meanForecast[1:5]) - as.numeric(test[1:5]))^2))
#Returning a list of the RMSE values
x = list(RMSE, RMSE1.5)
names(x) <- c("RMSE", "RMSE Obs. 1:5")
return(x)
}
#Paul PC
#Beijing <- read.csv("~/Classes/STAT429 (UIUC)/Project/Data Sets/BeijingPM20100101_20151231.csv",
# stringsAsFactors=TRUE)
#Paul MacOS
Beijing <- read.csv("~/Desktop/Courses/STAT429 (UIUC)/Project/Data Sets/BeijingPM20100101_20151231.csv", stringsAsFactors=TRUE)
#Going by each recording station
Dongsi = Beijing %>% drop_na(PM_Dongsi)
Dongsihuan = Beijing %>% drop_na(PM_Dongsihuan)
Nongzhanguan = Beijing %>% drop_na(PM_Nongzhanguan)
USPost = Beijing %>% drop_na(PM_US.Post)
NA Checking#Looking for Columns with NA
result = data.frame(matrix(data = rep(0,5*ncol(Beijing)), nrow = ncol(Beijing)))
for(i in 1:ncol(Beijing)){
result[i,1] = any(is.na(Beijing[,i]))
result[i,2] = any(is.na(Dongsi[,i]))
result[i,3] = any(is.na(Dongsihuan[,i]))
result[i,4] = any(is.na(Nongzhanguan[,i]))
result[i,5] = any(is.na(USPost[,i]))
}
#Outputting Results
#Each row is a column in the data while the columns represent the data sets.
result = result %>% rename(Beijing = "X1") %>% rename(Dongsi = "X2") %>%
rename(Dongsihuan = "X3") %>% rename(Nongzhanguan = "X4") %>%
rename(USPost = "X5") %>%
mutate(Beijing = ifelse(Beijing == 0, "No", "Yes")) %>%
mutate(Dongsi = ifelse(Dongsi == 0, "No", "Yes")) %>%
mutate(Dongsihuan = ifelse(Dongsihuan == 0, "No", "Yes")) %>%
mutate(Nongzhanguan = ifelse(Nongzhanguan == 0, "No", "Yes")) %>%
mutate(USPost = ifelse(USPost == 0, "No", "Yes"))
rownames(result) = colnames(Beijing)
result
## Beijing Dongsi Dongsihuan Nongzhanguan USPost
## No No No No No No
## year No No No No No
## month No No No No No
## day No No No No No
## hour No No No No No
## season No No No No No
## PM_Dongsi Yes No Yes Yes Yes
## PM_Dongsihuan Yes Yes No Yes Yes
## PM_Nongzhanguan Yes Yes Yes No Yes
## PM_US.Post Yes Yes Yes Yes No
## DEWP Yes Yes Yes Yes Yes
## HUMI Yes Yes Yes Yes Yes
## PRES Yes Yes Yes Yes Yes
## TEMP Yes Yes Yes Yes Yes
## cbwd Yes Yes Yes Yes Yes
## Iws Yes Yes Yes Yes Yes
## precipitation Yes Yes Yes Yes Yes
## Iprec Yes Yes Yes Yes Yes
#Dongsi
Dongsi$Date = as.Date(with(Dongsi,paste(day,month,year,sep = "-")), "%d-%m-%Y")
Dongsi$Recorded = as.POSIXct(paste(Dongsi$Date, Dongsi$hour), format = "%Y-%m-%d %H")
Dongsi = Dongsi %>%
select(Date, Recorded, season, PM_Dongsi, DEWP, HUMI, PRES, TEMP, cbwd, Iws, precipitation,
Iprec)
#Dongsihuan
Dongsihuan$Date = as.Date(with(Dongsihuan,paste(day,month,year,sep = "-")), "%d-%m-%Y")
Dongsihuan$Recorded = as.POSIXct(paste(Dongsihuan$Date, Dongsihuan$hour), format = "%Y-%m-%d %H")
Dongsihuan = Dongsihuan %>%
select(Date, Recorded, season, PM_Dongsihuan, DEWP, HUMI, PRES, TEMP, cbwd, Iws, precipitation,
Iprec)
#Nongzhanguan
Nongzhanguan$Date = as.Date(with(Nongzhanguan,paste(day,month,year,sep = "-")), "%d-%m-%Y")
Nongzhanguan$Recorded = as.POSIXct(paste(Nongzhanguan$Date, Nongzhanguan$hour),
format = "%Y-%m-%d %H")
Nongzhanguan = Nongzhanguan %>%
select(Date, Recorded, season, PM_Nongzhanguan, DEWP, HUMI, PRES, TEMP, cbwd, Iws, precipitation,
Iprec)
#US Post
USPost$Date = as.Date(with(USPost,paste(day,month,year,sep = "-")), "%d-%m-%Y")
USPost$Recorded = as.POSIXct(paste(USPost$Date, USPost$hour), format = "%Y-%m-%d %H")
USPost = USPost %>%
select(Date, Recorded, season, PM_US.Post, DEWP, HUMI, PRES, TEMP, cbwd, Iws, precipitation,
Iprec)
#Sub-setting the aggregated mean PM 2.5 readings for each day in the four locations.
D = aggregate(PM_Dongsi ~ Date, Dongsi, mean)
DS = aggregate(PM_Dongsihuan ~ Date, Dongsihuan, mean)
N = aggregate(PM_Nongzhanguan ~ Date, Nongzhanguan, mean)
US = aggregate(PM_US.Post ~ Date, USPost, mean)
#Dongsi
tsplot(D$Date, D$PM_Dongsi, type = "l", xlab = "Date Recorded",
ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"), main = "Dongsi")
#Dongsihuan
tsplot(DS$Date, DS$PM_Dongsihuan, type = "l", xlab = "Date Recorded",
ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"), main = "Dongsihuan")
#Nongzhanguan
tsplot(N$Date, N$PM_Nongzhanguan, type = "l", xlab = "Date Recorded",
ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"), main = "Nongzhanguan")
#US Post
tsplot(US$Date, US$PM_US.Post, type = "l", xlab = "Date Recorded",
ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"), main = "US Post")
#Dongsi
periodogram(D$PM_Dongsi, main = "Dongsi")
#Dongsihuan
periodogram(DS$PM_Dongsihuan, main = "Dongsihuan")
#Nongzhanguan
periodogram(N$PM_Nongzhanguan, main = "Nongzhanguan")
#US Post
periodogram(US$PM_US.Post, main = "US Post")
#Smoothed Periodogram
smooth = mvspec(D$PM_Dongsi, spans = 15, col = "blue", lwd = 2)
#Determining Optimal Frequencies
freq = data.frame(smooth$freq, smooth$spec)
freq = freq %>% rename(Frequency = "smooth.freq") %>% rename(Spectrum = "smooth.spec") %>%
arrange(desc(Spectrum)) %>% filter(Spectrum >= 15000) %>% mutate(Cycle_Days = 1/Frequency)
freq
## Frequency Spectrum Cycle_Days
## 1 0.0018518519 35141.10 540.000000
## 2 0.0009259259 35025.04 1080.000000
## 3 0.0027777778 34332.40 360.000000
## 4 0.0037037037 29406.10 270.000000
## 5 0.0046296296 24469.97 216.000000
## 6 0.0055555556 23996.78 180.000000
## 7 0.0064814815 23573.63 154.285714
## 8 0.0074074074 23341.40 135.000000
## 9 0.0083333333 23220.27 120.000000
## 10 0.0092592593 18327.21 108.000000
## 11 0.0759259259 17549.32 13.170732
## 12 0.0750000000 16856.94 13.333333
## 13 0.0768518519 16763.61 13.012048
## 14 0.0777777778 16195.32 12.857143
## 15 0.0740740741 16024.85 13.500000
## 16 0.0787037037 15364.71 12.705882
## 17 0.1018518519 15292.38 9.818182
## 18 0.0731481481 15221.00 13.670886
## 19 0.1027777778 15059.53 9.729730
par(mfrow = c(2,3))
#Dongsi
tsplot(D$Date, D$PM_Dongsi, type = "l", xlab = "Date Recorded",
ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"), main = "Dongsi")
acf(D$PM_Dongsi, main = "")
pacf(D$PM_Dongsi, ylab = "PACF", main = "")
#Log transformation of original data
tsplot(D$Date, log(D$PM_Dongsi), type = "l", xlab = "Date Recorded",
ylab = TeX(r"(log(PM2.5 Concentrate); $\mu g/m^3$)"), main = "Dongsi")
acf(log(D$PM_Dongsi), main = "")
pacf(log(D$PM_Dongsi), ylab = "PACF", main = "")
par(mfrow = c(3,1))
#Adding 2 Week Difference
tsplot(diff(log(D$PM_Dongsi), 14),
type = "l", xlab = "Date Recorded",
ylab = TeX(r"(log(PM2.5 Concentrate); $\mu g/m^3$)"), main = "Dongsi")
acf(diff(log(D$PM_Dongsi, 14)), main = "", lag.max = 100)
pacf(diff(log(D$PM_Dongsi, 14)), ylab = "PACF", main = "", lag.max = 100)
#Log of Dongsi Data
adf.test(log(D$PM_Dongsi))
##
## Augmented Dickey-Fuller Test
##
## data: log(D$PM_Dongsi)
## Dickey-Fuller = -8.6211, Lag order = 10, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(log(D$PM_Dongsi))
##
## KPSS Test for Level Stationarity
##
## data: log(D$PM_Dongsi)
## KPSS Level = 0.77467, Truncation lag parameter = 7, p-value = 0.01
#Differenced Data
adf.test(diff(log(D$PM_Dongsi), 14))
##
## Augmented Dickey-Fuller Test
##
## data: diff(log(D$PM_Dongsi), 14)
## Dickey-Fuller = -8.4027, Lag order = 10, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(diff(log(D$PM_Dongsi), 14))
##
## KPSS Test for Level Stationarity
##
## data: diff(log(D$PM_Dongsi), 14)
## KPSS Level = 0.062366, Truncation lag parameter = 7, p-value = 0.1
#Original Idea
d1 = sarima(log(D$PM_Dongsi), p = 0, d = 0, q = 3, P = 0, D = 1, Q = 1, S = 14,
no.constant = TRUE)
## initial value 0.217436
## iter 2 value -0.121515
## iter 3 value -0.151691
## iter 4 value -0.183079
## iter 5 value -0.191276
## iter 6 value -0.191361
## iter 7 value -0.193062
## iter 8 value -0.193215
## iter 9 value -0.193246
## iter 10 value -0.193250
## iter 11 value -0.193250
## iter 12 value -0.193250
## iter 13 value -0.193250
## iter 13 value -0.193250
## iter 13 value -0.193250
## final value -0.193250
## converged
## initial value -0.211589
## iter 2 value -0.222983
## iter 3 value -0.223506
## iter 4 value -0.223820
## iter 5 value -0.223845
## iter 6 value -0.223849
## iter 7 value -0.223853
## iter 8 value -0.223853
## iter 8 value -0.223853
## iter 8 value -0.223853
## final value -0.223853
## converged
d1$ttable
## Estimate SE t.value p.value
## ma1 0.5566 0.0311 17.9219 0.0000
## ma2 0.1403 0.0371 3.7826 0.0002
## ma3 0.0072 0.0310 0.2323 0.8164
## sma1 -0.9644 0.0157 -61.3036 0.0000
#3rd AR NOT significant
d2 = sarima(log(D$PM_Dongsi), p = 0, d = 0, q = 2, P = 0, D = 1, Q = 1, S = 14,
no.constant = TRUE)
## initial value 0.217436
## iter 2 value -0.121594
## iter 3 value -0.151720
## iter 4 value -0.182885
## iter 5 value -0.191373
## iter 6 value -0.192353
## iter 7 value -0.193151
## iter 8 value -0.193245
## iter 9 value -0.193246
## iter 10 value -0.193248
## iter 11 value -0.193248
## iter 12 value -0.193248
## iter 12 value -0.193248
## iter 12 value -0.193248
## final value -0.193248
## converged
## initial value -0.211585
## iter 2 value -0.222986
## iter 3 value -0.223498
## iter 4 value -0.223809
## iter 5 value -0.223827
## iter 6 value -0.223827
## iter 7 value -0.223827
## iter 8 value -0.223827
## iter 8 value -0.223827
## final value -0.223827
## converged
d2$ttable
## Estimate SE t.value p.value
## ma1 0.5552 0.0303 18.2962 0
## ma2 0.1354 0.0304 4.4510 0
## sma1 -0.9641 0.0156 -61.6253 0
#Adding P = 1 to see what happens
d3 = sarima(log(D$PM_Dongsi), p = 0, d = 0, q = 2, P = 1, D = 1, Q = 1, S = 14,
no.constant = TRUE)
## initial value 0.218090
## iter 2 value -0.113546
## iter 3 value -0.160070
## iter 4 value -0.191309
## iter 5 value -0.198072
## iter 6 value -0.200075
## iter 7 value -0.201439
## iter 8 value -0.202096
## iter 9 value -0.202202
## iter 10 value -0.202204
## iter 10 value -0.202204
## final value -0.202204
## converged
## initial value -0.212906
## iter 2 value -0.220794
## iter 3 value -0.221575
## iter 4 value -0.223488
## iter 5 value -0.223706
## iter 6 value -0.223839
## iter 7 value -0.223842
## iter 8 value -0.223842
## iter 9 value -0.223842
## iter 9 value -0.223842
## iter 9 value -0.223842
## final value -0.223842
## converged
d3$ttable
## Estimate SE t.value p.value
## ma1 0.5552 0.0303 18.2987 0.0000
## ma2 0.1352 0.0304 4.4417 0.0000
## sar1 -0.0058 0.0331 -0.1747 0.8614
## sma1 -0.9631 0.0166 -58.1118 0.0000
#Original Idea
RMSE1 = tsRMSE(log(D$PM_Dongsi), 365, prob = 0.8, q = 3, D = 1, Q = 1, S = 14)
#3rd AR NOT significant
RMSE2 = tsRMSE(log(D$PM_Dongsi), 365, prob = 0.8, q = 2, D = 1, Q = 1, S = 14)
#Adding P = 1 to see what happens
RMSE3 = tsRMSE(log(D$PM_Dongsi), 365, prob = 0.8, q = 2, P = 1, D = 1, Q = 1, S = 14)
#Un-logging the RMSE
#Original Idea
#RMSE
exp(RMSE1[[1]])
## [1] 2.795435
#RMSE Obs. 1:5
exp(RMSE1[[2]])
## [1] 1.761661
#3rd AR NOT significant
#RMSE
exp(RMSE2[[1]])
## [1] 2.796207
#RMSE Obs. 1:5
exp(RMSE2[[2]])
## [1] 1.766332
#Adding P = 1 to see what happens
#RMSE
exp(RMSE3[[1]])
## [1] 2.796215
#RMSE Obs. 1:5
exp(RMSE3[[2]])
## [1] 1.766264
#Original Idea
Box.test(d1$fit$residuals, lag = 10, fitdf = 0, type = "Lj")
##
## Box-Ljung test
##
## data: d1$fit$residuals
## X-squared = 9.3977, df = 10, p-value = 0.4948
#3rd AR NOT significant
Box.test(d2$fit$residuals, lag = 10, fitdf = 0, type = "Lj")
##
## Box-Ljung test
##
## data: d2$fit$residuals
## X-squared = 9.4653, df = 10, p-value = 0.4886
#Adding P = 1 to see what happens
Box.test(d3$fit$residuals, lag = 10, fitdf = 0, type = "Lj")
##
## Box-Ljung test
##
## data: d3$fit$residuals
## X-squared = 9.4412, df = 10, p-value = 0.4908
\[ \boxed{\text{SARIMA}(0,0,2)\times(0,1,1)_{14}} \]
par(mfrow = c(1,2))
#5 Days Ahead
pred1 = sarima.for(log(D$PM_Dongsi), p = 0, d = 0, q = 2, P = 0, D = 1, Q = 1, S = 14,
no.constant = TRUE, n.ahead = 5)
#1 Month Ahead
pred2 = sarima.for(log(D$PM_Dongsi), p = 0, d = 0, q = 2, P = 0, D = 1, Q = 1, S = 14,
no.constant = TRUE, n.ahead = 31)
#5 Days Ahead
par(mfrow = c(1,2))
plot(D$Date, rep(0,nrow(D)), col = "white", xlim = c(D$Date[1], as.Date("2016-01-05")),
ylim = c(0,600), xlab = "Date", ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"),
main = "Five Days Ahead")
grid()
box()
lines(D$Date, D$PM_Dongsi)
lines(seq.Date(as.Date("2016-01-01"), as.Date("2016-01-05"), "day"), exp(pred1$pred),
col = "red")
#1 Month Ahead
plot(D$Date, rep(0,nrow(D)), col = "white", xlim = c(D$Date[1], as.Date("2016-01-31")),
ylim = c(0,600), xlab = "Date", ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"),
main = "One Month Ahead")
grid()
box()
lines(D$Date, D$PM_Dongsi)
lines(seq.Date(as.Date("2016-01-01"), as.Date("2016-01-31"), "day"), exp(pred2$pred),
col = "red")
#5 Days Ahead
par(mfrow = c(1,2))
plot(D$Date[893:1076], rep(0,184), col = "white", xlim = c(D$Date[893], as.Date("2016-01-05")),
ylim = c(0,600), xlab = "2015", ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"),
main = "Five Days Ahead")
grid()
box()
lines(D$Date[893:1076], D$PM_Dongsi[893:1076])
lines(seq.Date(as.Date("2016-01-01"), as.Date("2016-01-05"), "day"), exp(pred1$pred),
col = "red")
#1 Month Ahead
plot(D$Date[893:1076], rep(0,184), col = "white", xlim = c(D$Date[893], as.Date("2016-01-31")),
ylim = c(0,600), xlab = "2015-2016", ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"),
main = "One Month Ahead")
grid()
box()
lines(D$Date[893:1076], D$PM_Dongsi[893:1076])
lines(seq.Date(as.Date("2016-01-01"), as.Date("2016-01-31"), "day"), exp(pred2$pred),
col = "red")
#Next 5 Day Predictions
data.frame(Date = seq.Date(as.Date("2016/1/1"), as.Date("2016/1/5"), "days"),
PM2.5_Prediction = exp(pred1$pred[1:5]))
## Date PM2.5_Prediction
## 1 2016-01-01 61.71153
## 2 2016-01-02 56.02265
## 3 2016-01-03 57.29000
## 4 2016-01-04 61.34806
## 5 2016-01-05 61.87760
#Next Month (Jan. 2016) Predictions
data.frame(Date = seq.Date(as.Date("2016/1/1"), as.Date("2016/1/31"), "days"),
PM2.5_Prediction = exp(pred2$pred[1:31]))
## Date PM2.5_Prediction
## 1 2016-01-01 61.71153
## 2 2016-01-02 56.02265
## 3 2016-01-03 57.29000
## 4 2016-01-04 61.34806
## 5 2016-01-05 61.87760
## 6 2016-01-06 58.92754
## 7 2016-01-07 55.25888
## 8 2016-01-08 73.16011
## 9 2016-01-09 77.19976
## 10 2016-01-10 68.69977
## 11 2016-01-11 64.55527
## 12 2016-01-12 64.34653
## 13 2016-01-13 51.88646
## 14 2016-01-14 52.89140
## 15 2016-01-15 53.07031
## 16 2016-01-16 52.61497
## 17 2016-01-17 57.29000
## 18 2016-01-18 61.34806
## 19 2016-01-19 61.87760
## 20 2016-01-20 58.92754
## 21 2016-01-21 55.25888
## 22 2016-01-22 73.16011
## 23 2016-01-23 77.19976
## 24 2016-01-24 68.69977
## 25 2016-01-25 64.55527
## 26 2016-01-26 64.34653
## 27 2016-01-27 51.88646
## 28 2016-01-28 52.89140
## 29 2016-01-29 53.07031
## 30 2016-01-30 52.61497
## 31 2016-01-31 57.29000
#Log transformation of original data
tsplot(D$Date, log(D$PM_Dongsi), type = "l", xlab = "Date Recorded",
ylab = TeX(r"(log(PM2.5 Concentrate); $\mu g/m^3$)"), main = "Dongsi")
par(mfrow = c(2,2))
acf(log(D$PM_Dongsi), main = "")
pacf(log(D$PM_Dongsi), ylab = "PACF", main = "")
#Squared Log transformation of original data
acf(log(D$PM_Dongsi)^2, main = "")
pacf(log(D$PM_Dongsi)^2, ylab = "PACF", main = "")
#ARMA(2,2)-ARCH(2)
d4 = garchFit(~arma(2,2) + garch(2,0), log(Dongsi$PM_Dongsi))
#ARMA(2,1)-ARCH(2)
d5 = garchFit(~arma(2,1) + garch(2,0), log(Dongsi$PM_Dongsi))
#ARMA(1,1)-ARCH(2)
d6 = garchFit(~arma(1,1) + garch(2,0), log(Dongsi$PM_Dongsi))
#ARMA(1,1)-GARCH(2,1)
d7 = garchFit(~arma(1,1) + garch(2,1), log(Dongsi$PM_Dongsi))
#ARMA(1,1)-GARCH(1,2)
d8 = garchFit(~arma(1,1) + garch(1,2), log(Dongsi$PM_Dongsi))
#ARMA(2,2)-ARCH(2)
summary(d4)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~arma(2, 2) + garch(2, 0), data = log(Dongsi$PM_Dongsi))
##
## Mean and Variance Equation:
## data ~ arma(2, 2) + garch(2, 0)
## <environment: 0x7ff19adeaf78>
## [data = log(Dongsi$PM_Dongsi)]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu ar1 ar2 ma1 ma2 omega
## 0.17950816 0.99999999 -0.03850724 0.18281760 0.00098834 0.04809046
## alpha1 alpha2
## 0.69700780 0.24812296
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 0.1795082 0.0361128 4.971 6.67e-07 ***
## ar1 1.0000000 0.1916205 5.219 1.80e-07 ***
## ar2 -0.0385072 0.1840711 -0.209 0.834
## ma1 0.1828176 0.1916855 0.954 0.340
## ma2 0.0009883 0.0435462 0.023 0.982
## omega 0.0480905 0.0007997 60.137 < 2e-16 ***
## alpha1 0.6970078 0.0218844 31.850 < 2e-16 ***
## alpha2 0.2481230 0.0124868 19.871 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## -5826.505 normalized: -0.2325764
##
## Description:
## Mon Nov 28 18:35:03 2022 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 978500.3 0
## Shapiro-Wilk Test R W NA NA
## Ljung-Box Test R Q(10) 61.5835 1.81569e-09
## Ljung-Box Test R Q(15) 82.78766 2.152045e-11
## Ljung-Box Test R Q(20) 89.47753 9.147039e-11
## Ljung-Box Test R^2 Q(10) 48.51516 4.997024e-07
## Ljung-Box Test R^2 Q(15) 82.3545 2.585254e-11
## Ljung-Box Test R^2 Q(20) 537.498 0
## LM Arch Test R TR^2 48.39568 2.666963e-06
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## 0.4657916 0.4683873 0.4657913 0.4666316
#ARMA(2,1)-ARCH(2)
summary(d5)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~arma(2, 1) + garch(2, 0), data = log(Dongsi$PM_Dongsi))
##
## Mean and Variance Equation:
## data ~ arma(2, 1) + garch(2, 0)
## <environment: 0x7ff1858a12a8>
## [data = log(Dongsi$PM_Dongsi)]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu ar1 ar2 ma1 omega alpha1 alpha2
## 0.179409 1.000000 -0.038479 0.182290 0.048085 0.697309 0.248019
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 0.1794092 0.0106603 16.830 < 2e-16 ***
## ar1 1.0000000 0.0460549 21.713 < 2e-16 ***
## ar2 -0.0384794 0.0445598 -0.864 0.388
## ma1 0.1822895 0.0413901 4.404 1.06e-05 ***
## omega 0.0480849 0.0007994 60.148 < 2e-16 ***
## alpha1 0.6973091 0.0218667 31.889 < 2e-16 ***
## alpha2 0.2480193 0.0124810 19.872 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## -5826.509 normalized: -0.2325766
##
## Description:
## Mon Nov 28 18:35:14 2022 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 978129.5 0
## Shapiro-Wilk Test R W NA NA
## Ljung-Box Test R Q(10) 61.84322 1.620704e-09
## Ljung-Box Test R Q(15) 83.11141 1.876177e-11
## Ljung-Box Test R Q(20) 89.79035 8.065604e-11
## Ljung-Box Test R^2 Q(10) 48.53828 4.948602e-07
## Ljung-Box Test R^2 Q(15) 82.32485 2.617906e-11
## Ljung-Box Test R^2 Q(20) 537.7932 0
## LM Arch Test R TR^2 48.42046 2.640567e-06
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## 0.4657121 0.4679834 0.4657119 0.4664472
#ARMA(1,1)-ARCH(2)
summary(d6)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~arma(1, 1) + garch(2, 0), data = log(Dongsi$PM_Dongsi))
##
## Mean and Variance Equation:
## data ~ arma(1, 1) + garch(2, 0)
## <environment: 0x7ff18448cf40>
## [data = log(Dongsi$PM_Dongsi)]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu ar1 ma1 omega alpha1 alpha2
## 0.185486 0.960284 0.217127 0.048045 0.699624 0.247132
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 0.185486 0.008095 22.91 <2e-16 ***
## ar1 0.960284 0.001827 525.46 <2e-16 ***
## ma1 0.217127 0.008121 26.74 <2e-16 ***
## omega 0.048045 0.000797 60.28 <2e-16 ***
## alpha1 0.699624 0.021687 32.26 <2e-16 ***
## alpha2 0.247132 0.012423 19.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## -5827.065 normalized: -0.2325988
##
## Description:
## Mon Nov 28 18:35:22 2022 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 974797.9 0
## Shapiro-Wilk Test R W NA NA
## Ljung-Box Test R Q(10) 64.68478 4.656812e-10
## Ljung-Box Test R Q(15) 86.66331 4.140244e-12
## Ljung-Box Test R Q(20) 93.20554 2.028688e-11
## Ljung-Box Test R^2 Q(10) 48.76742 4.493073e-07
## Ljung-Box Test R^2 Q(15) 82.07812 2.905931e-11
## Ljung-Box Test R^2 Q(20) 540.2474 0
## LM Arch Test R TR^2 48.66476 2.393706e-06
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## 0.4656766 0.4676234 0.4656765 0.4663067
#ARMA(1,1)-GARCH(2,1)
summary(d7)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~arma(1, 1) + garch(2, 1), data = log(Dongsi$PM_Dongsi))
##
## Mean and Variance Equation:
## data ~ arma(1, 1) + garch(2, 1)
## <environment: 0x7ff185278710>
## [data = log(Dongsi$PM_Dongsi)]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu ar1 ma1 omega alpha1 alpha2
## 0.15276729 0.96774528 0.17358902 0.01453097 0.47961127 0.00000001
## beta1
## 0.54392546
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 1.528e-01 8.617e-03 17.728 < 2e-16 ***
## ar1 9.677e-01 1.894e-03 511.074 < 2e-16 ***
## ma1 1.736e-01 8.863e-03 19.586 < 2e-16 ***
## omega 1.453e-02 1.853e-03 7.841 4.44e-15 ***
## alpha1 4.796e-01 1.899e-02 25.260 < 2e-16 ***
## alpha2 1.000e-08 6.202e-02 0.000 1
## beta1 5.439e-01 5.423e-02 10.031 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## -5042.155 normalized: -0.2012675
##
## Description:
## Mon Nov 28 18:35:29 2022 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 1688456 0
## Shapiro-Wilk Test R W NA NA
## Ljung-Box Test R Q(10) 63.74678 7.034529e-10
## Ljung-Box Test R Q(15) 96.42636 6.183942e-14
## Ljung-Box Test R Q(20) 101.4721 6.861178e-13
## Ljung-Box Test R^2 Q(10) 4.923583 0.8962239
## Ljung-Box Test R^2 Q(15) 5.480195 0.9872208
## Ljung-Box Test R^2 Q(20) 444.4295 0
## LM Arch Test R TR^2 5.142499 0.9530491
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## 0.4030939 0.4053652 0.4030938 0.4038290
#ARMA(1,1)-GARCH(1,2)
summary(d8)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~arma(1, 1) + garch(1, 2), data = log(Dongsi$PM_Dongsi))
##
## Mean and Variance Equation:
## data ~ arma(1, 1) + garch(1, 2)
## <environment: 0x7ff185e6c470>
## [data = log(Dongsi$PM_Dongsi)]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu ar1 ma1 omega alpha1 beta1 beta2
## 0.146942 0.969056 0.172765 0.015601 0.525757 0.258890 0.233322
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 0.1469419 0.0081145 18.11 <2e-16 ***
## ar1 0.9690559 0.0017940 540.18 <2e-16 ***
## ma1 0.1727649 0.0087783 19.68 <2e-16 ***
## omega 0.0156013 0.0005181 30.11 <2e-16 ***
## alpha1 0.5257568 0.0168382 31.22 <2e-16 ***
## beta1 0.2588898 0.0175805 14.73 <2e-16 ***
## beta2 0.2333222 0.0145669 16.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## -4945.903 normalized: -0.1974255
##
## Description:
## Mon Nov 28 18:35:37 2022 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 1700403 0
## Shapiro-Wilk Test R W NA NA
## Ljung-Box Test R Q(10) 67.12197 1.58871e-10
## Ljung-Box Test R Q(15) 103.2691 3.108624e-15
## Ljung-Box Test R Q(20) 108.9241 3.08642e-14
## Ljung-Box Test R^2 Q(10) 4.343368 0.9305305
## Ljung-Box Test R^2 Q(15) 4.825206 0.9935057
## Ljung-Box Test R^2 Q(20) 320.7671 0
## LM Arch Test R TR^2 4.73223 0.9663299
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## 0.3954098 0.3976811 0.3954096 0.3961448
#ARMA(2,2)-ARCH(2)
RMSE4 = garchRMSE(log(Dongsi$PM_Dongsi), freq = 365, p = 2, q = 2, alpha = 2)
#ARMA(2,1)-ARCH(2)
RMSE5 = garchRMSE(log(Dongsi$PM_Dongsi), freq = 365, p = 2, q = 1, alpha = 2)
#ARMA(1,1)-ARCH(2)
RMSE6 = garchRMSE(log(Dongsi$PM_Dongsi), freq = 365, p = 1, q = 1, alpha = 2)
#ARMA(1,1)-GARCH(2,1)
RMSE7 = garchRMSE(log(Dongsi$PM_Dongsi), freq = 365, p = 1, q = 1, alpha = 2, beta = 1)
#ARMA(1,1)-GARCH(1,2)
RMSE8 = garchRMSE(log(Dongsi$PM_Dongsi), freq = 365, p = 1, q = 1, alpha = 1, beta = 2)
#Un-logging the RMSE
#ARMA(2,2)-ARCH(2)
#RMSE
exp(RMSE4[[1]])
## [1] 4.346233
#RMSE Obs. 1:5
exp(RMSE4[[2]])
## [1] 1.784541
#ARMA(2,1)-ARCH(2)
#RMSE
exp(RMSE5[[1]])
## [1] 4.236637
#RMSE Obs. 1:5
exp(RMSE5[[2]])
## [1] 1.812809
#ARMA(1,1)-ARCH(2)
#RMSE
exp(RMSE6[[1]])
## [1] 4.263201
#RMSE Obs. 1:5
exp(RMSE6[[2]])
## [1] 1.801926
#ARMA(1,1)-GARCH(2,1)
#RMSE
exp(RMSE7[[1]])
## [1] 4.711729
#RMSE Obs. 1:5
exp(RMSE7[[2]])
## [1] 1.755274
#ARMA(1,1)-GARCH(1,2)
#RMSE
exp(RMSE8[[1]])
## [1] 4.808208
#RMSE Obs. 1:5
exp(RMSE8[[2]])
## [1] 1.752103
\[ \boxed{\text{ARMA}(1,1)-\text{GARCH}(1,2)} \]
par(mfrow = c(1,2))
#5 Days Ahead
pred3 = predict(d8, n.ahead = 5)
plot(D$Date, rep(0,nrow(D)), col = "white", xlim = c(D$Date[1], as.Date("2016-01-05")),
ylim = c(0,600), xlab = "Date", ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"),
main = "Five Days Ahead")
grid()
box()
lines(D$Date, D$PM_Dongsi)
lines(seq.Date(as.Date("2016-01-01"), as.Date("2016-01-05"), "day"), exp(pred3$meanForecast),
col = "red")
#1 Month Ahead
pred4 = predict(d8, n.ahead = 31)
plot(D$Date, rep(0,nrow(D)), col = "white", xlim = c(D$Date[1], as.Date("2016-01-31")),
ylim = c(0,600), xlab = "Date", ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"),
main = "One Month Ahead")
grid()
box()
lines(D$Date, D$PM_Dongsi)
lines(seq.Date(as.Date("2016-01-01"), as.Date("2016-01-31"), "day"), exp(pred4$meanForecast),
col = "red")
par(mfrow = c(1,2))
#5 Days Ahead
plot(D$Date[893:1076], rep(0,184), col = "white", xlim = c(D$Date[893], as.Date("2016-01-05")),
ylim = c(0,600), xlab = "2015", ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"),
main = "Five Days Ahead")
grid()
box()
lines(D$Date[893:1076], D$PM_Dongsi[893:1076])
lines(seq.Date(as.Date("2016-01-01"), as.Date("2016-01-05"), "day"), exp(pred3$meanForecast),
col = "red")
#1 Month Ahead
plot(D$Date[893:1076], rep(0,184), col = "white", xlim = c(D$Date[893], as.Date("2016-01-31")),
ylim = c(0,600), xlab = "2015-2016", ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"),
main = "One Month Ahead")
grid()
box()
lines(D$Date[893:1076], D$PM_Dongsi[893:1076])
lines(seq.Date(as.Date("2016-01-01"), as.Date("2016-01-31"), "day"), exp(pred4$meanForecast),
col = "red")
#Next 5 Day Predictions
data.frame(Date = seq.Date(as.Date("2016/1/1"), as.Date("2016/1/5"), "days"),
PM2.5_Prediction = exp(pred3$meanForecast[1:5]))
## Date PM2.5_Prediction
## 1 2016-01-01 206.5966
## 2 2016-01-02 202.9083
## 3 2016-01-03 199.3970
## 4 2016-01-04 196.0523
## 5 2016-01-05 192.8647
#Next Month (Jan. 2016) Predictions
data.frame(Date = seq.Date(as.Date("2016/1/1"), as.Date("2016/1/31"), "days"),
PM2.5_Prediction = exp(pred4$meanForecast[1:31]))
## Date PM2.5_Prediction
## 1 2016-01-01 206.5966
## 2 2016-01-02 202.9083
## 3 2016-01-03 199.3970
## 4 2016-01-04 196.0523
## 5 2016-01-05 192.8647
## 6 2016-01-06 189.8251
## 7 2016-01-07 186.9253
## 8 2016-01-08 184.1575
## 9 2016-01-09 181.5145
## 10 2016-01-10 178.9894
## 11 2016-01-11 176.5760
## 12 2016-01-12 174.2683
## 13 2016-01-13 172.0608
## 14 2016-01-14 169.9483
## 15 2016-01-15 167.9260
## 16 2016-01-16 165.9891
## 17 2016-01-17 164.1335
## 18 2016-01-18 162.3552
## 19 2016-01-19 160.6502
## 20 2016-01-20 159.0151
## 21 2016-01-21 157.4464
## 22 2016-01-22 155.9411
## 23 2016-01-23 154.4961
## 24 2016-01-24 153.1086
## 25 2016-01-25 151.7759
## 26 2016-01-26 150.4955
## 27 2016-01-27 149.2650
## 28 2016-01-28 148.0822
## 29 2016-01-29 146.9449
## 30 2016-01-30 145.8512
## 31 2016-01-31 144.7991
#SARIMA
par(mfrow = c(2,2))
#5 Days Ahead
plot(D$Date[893:1076], rep(0,184), col = "white", xlim = c(D$Date[893], as.Date("2016-01-05")),
ylim = c(0,600), xlab = "2015", ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"),
main = "SARIMA Five Days Ahead")
grid()
box()
lines(D$Date[893:1076], D$PM_Dongsi[893:1076])
lines(seq.Date(as.Date("2016-01-01"), as.Date("2016-01-05"), "day"), exp(pred1$pred),
col = "red")
#1 Month Ahead
plot(D$Date[893:1076], rep(0,184), col = "white", xlim = c(D$Date[893], as.Date("2016-01-31")),
ylim = c(0,600), xlab = "2015-2016", ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"),
main = "SARIMA One Month Ahead")
grid()
box()
lines(D$Date[893:1076], D$PM_Dongsi[893:1076])
lines(seq.Date(as.Date("2016-01-01"), as.Date("2016-01-31"), "day"), exp(pred2$pred),
col = "red")
#ARMA-GARCH
#5 Days Ahead
plot(D$Date[893:1076], rep(0,184), col = "white", xlim = c(D$Date[893], as.Date("2016-01-05")),
ylim = c(0,600), xlab = "2015", ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"),
main = "ARMA-GARCH Five Days Ahead")
grid()
box()
lines(D$Date[893:1076], D$PM_Dongsi[893:1076])
lines(seq.Date(as.Date("2016-01-01"), as.Date("2016-01-05"), "day"), exp(pred3$meanForecast),
col = "red")
#1 Month Ahead
plot(D$Date[893:1076], rep(0,184), col = "white", xlim = c(D$Date[893], as.Date("2016-01-31")),
ylim = c(0,600), xlab = "2015-2016", ylab = TeX(r"(PM2.5 Concentrate; $\mu g/m^3$)"),
main = "ARMA-GARCH One Month Ahead")
grid()
box()
lines(D$Date[893:1076], D$PM_Dongsi[893:1076])
lines(seq.Date(as.Date("2016-01-01"), as.Date("2016-01-31"), "day"), exp(pred4$meanForecast),
col = "red")
#Next 5 Day Predictions
data.frame(Date = seq.Date(as.Date("2016/1/1"), as.Date("2016/1/5"), "days"),
SARIMA_Pred = exp(pred1$pred[1:5]),
ARMA_GARCH_Pred = exp(pred3$meanForecast[1:5]))
## Date SARIMA_Pred ARMA_GARCH_Pred
## 1 2016-01-01 61.71153 206.5966
## 2 2016-01-02 56.02265 202.9083
## 3 2016-01-03 57.29000 199.3970
## 4 2016-01-04 61.34806 196.0523
## 5 2016-01-05 61.87760 192.8647
#Next Month (Jan. 2016) Predictions
data.frame(Date = seq.Date(as.Date("2016/1/1"), as.Date("2016/1/31"), "days"),
SARIMA_Pred = exp(pred2$pred[1:31]),
ARMA_GARCH_Pred = exp(pred4$meanForecast[1:31]))
## Date SARIMA_Pred ARMA_GARCH_Pred
## 1 2016-01-01 61.71153 206.5966
## 2 2016-01-02 56.02265 202.9083
## 3 2016-01-03 57.29000 199.3970
## 4 2016-01-04 61.34806 196.0523
## 5 2016-01-05 61.87760 192.8647
## 6 2016-01-06 58.92754 189.8251
## 7 2016-01-07 55.25888 186.9253
## 8 2016-01-08 73.16011 184.1575
## 9 2016-01-09 77.19976 181.5145
## 10 2016-01-10 68.69977 178.9894
## 11 2016-01-11 64.55527 176.5760
## 12 2016-01-12 64.34653 174.2683
## 13 2016-01-13 51.88646 172.0608
## 14 2016-01-14 52.89140 169.9483
## 15 2016-01-15 53.07031 167.9260
## 16 2016-01-16 52.61497 165.9891
## 17 2016-01-17 57.29000 164.1335
## 18 2016-01-18 61.34806 162.3552
## 19 2016-01-19 61.87760 160.6502
## 20 2016-01-20 58.92754 159.0151
## 21 2016-01-21 55.25888 157.4464
## 22 2016-01-22 73.16011 155.9411
## 23 2016-01-23 77.19976 154.4961
## 24 2016-01-24 68.69977 153.1086
## 25 2016-01-25 64.55527 151.7759
## 26 2016-01-26 64.34653 150.4955
## 27 2016-01-27 51.88646 149.2650
## 28 2016-01-28 52.89140 148.0822
## 29 2016-01-29 53.07031 146.9449
## 30 2016-01-30 52.61497 145.8512
## 31 2016-01-31 57.29000 144.7991
\[ \mathbf{\boxed{\text{ARMA}(1,1)-\text{GARCH}(1,2)}} \]